# R code for constructing figures from the manuscript
# S van Buuren (2006), Worm plot to diagnose fit in quantile regression
# July 2006

library(quantreg)
library(splines)
library(gamlss)
dutchboys <- read.table(file="S:\\projecten\\a-i\\Groeistat\\Qreg\\db7482.txt",header=T,sep="\t")

# meaning of the columns in data frame ducthboys:
# defnr	identification number
# age		age in decimal years
# hgt		length/height in cm
# wgt		weight in kg
# hc		head circumference in cm
# hgt.z	z-score lengt/height 
# wgt.z	z-score weight
# hc.z	z-score head circumference
# bmi.z	z-score body mass index
# hfw.z	z-score height for weight
# z-scores were calculated relative to the Dutch references (Fredriks et al, 2000).

# --- Figure 2: plot of the data
plot(x=dutchboys[,"age"],y=dutchboys[,"hgt"],xlab="Age (years)",ylab="Height (cm)",cex=0.4)


# --- Figure 3: Worm plot of the LMS solution using gamlss
wp.data <- na.omit(dutchboys[,c("defnr","age","hgt","hgt.z")])
fit <- gamlss(hgt.z~0, data=wp.data)  # fit null model to create a gamlss object
wp(fit,xvar=wp.data$age,n=16,ylim.worm=0.5,show=F,cex=0.6)
title("LMS model (L=0,M=10,S=6)",cex=0.6)


# --- Figure 4: example that the conventional QQplot can be misleading (three plots)
sinus <- data.frame(x=x<-runif(1000,0,pi),y=sin(x)+rnorm(1000,0,0.2))

# --- Figure 4a: data
plot(sinus,xlab="x",ylab="sin(x)+error")
fit <- rq(y~1+x,data=sinus)
abline(fit)

# --- Figure 4b: detrended QQ plot
tau.fine <- c((1:9)/1000,(1:99)/100,(991:999)/1000)
fit.fine <- rq(y~1+x,data=sinus,tau=tau.fine)
table.ages <- seq(0,pi,by=pi/16)
ref.table.fine <- predict.rq(fit.fine,data.frame(x<-table.ages))
extract.y <- function(lst)
{	r <- matrix(NA,ncol=length(lst),nrow=length(lst[[1]]$y))
	for(i in 1:length(lst))
      	r[,i] <- lst[[i]]$y
	return(r)
}
extract.p <- function(mat)
{	r <- rep(NA, nrow(mat))
	for(i in 1:length(r))
		if(!is.na(mat[i,1]))r[i] <- approx(mat[i,],y=tau.fine,xout=sinus[i,"y"],rule=2)$y
	return(r)
}
inter1 <- apply(ref.table.fine, 2, approx, x=table.ages, xout=sinus[,"x"])
inter2 <- extract.y(inter1)
emperc <- extract.p(inter2)
# if model fits well, then emperical percentiles should be uniform
# hist(emperc)

wp.data <- na.omit(cbind(sinus[,c("x","y")],hgt.z.qr=qnorm(emperc)))
fit <- gamlss(hgt.z.qr~0, data=wp.data) # fit empty model to construct gamlss object
wp(fit,xvar=NULL,ylim.worm=0.5,show=F, cex=0.6)
title("Unconditional QQ plot",cex=0.6)

# --- Figure 4c: detrended QQ plot
wp(fit,xvar=wp.data$x,n.inter=4,ylim.worm=0.5,show=F, cex=0.6)
title("Conditional QQ plot",cex=0.6)



# --- Calculate for Figure 5: Worm plot for regression quantiles applied to Dutch boys

# calculate B-spline basis of Wei et al (2006), Stat Med
age.knots <- c(0.2,0.5,1.0,1.5,2.0,5.0,8.0,10.0,11.5,13.0,14.5,16.0)
basis <- bs(dutchboys[,"age"],knots=age.knots,Boundary.knots=c(0,20))

# calculate reference table
table.ages <- c(1:11/12,2:40/2)
tau <- pnorm(c(-2.5,-2,-1,0,1,2,2.5))
fit <- rq(hgt~bs(age,knots=age.knots,Bo=c(0,20)),data=dutchboys,tau=tau)
ref.table <- cbind(age=table.ages,predict.rq(fit,data.frame(age<-table.ages)))

# calculate the emperical percentiles
tau.fine <- c((1:9)/1000,(1:99)/100,(991:999)/1000)
fit.fine <- rq(hgt~bs(age,knots=age.knots,Bo=c(0,20)),data=dutchboys,tau=tau.fine)
ref.table.fine <- predict.rq(fit.fine,data.frame(age<-table.ages))
extract.y <- function(lst)
{	r <- matrix(NA,ncol=length(lst),nrow=length(lst[[1]]$y))
	for(i in 1:length(lst))
      	r[,i] <- lst[[i]]$y
	return(r)
}
extract.p <- function(mat)
{	r <- rep(NA, nrow(mat))
	for(i in 1:length(r))
		if(!is.na(mat[i,1]))r[i] <- approx(mat[i,],y=tau.fine,xout=dutchboys[i,"hgt"],rule=2)$y
	return(r)
}
inter1 <- apply(ref.table.fine, 2, approx, x=table.ages, xout=dutchboys[,"age"])
inter2 <- extract.y(inter1)
emperc <- extract.p(inter2)

# if model fits well, then emperical percentiles should be uniform
# hist(emperc)

# --- Figure 5: Worm plot of the quantile regression

# create worm plot (using the gamlss implementation)
wp.data <- na.omit(cbind(dutchboys[,c("defnr","age","hgt","hgt.z")],hgt.z.qr=qnorm(emperc)))
fit <- gamlss(hgt.z.qr~0, data=wp.data) # fit empty model to construct gamlss object
z <- wp(fit,xvar=wp.data$age,n=16,ylim.worm=0.5,show=F, cex=0.6)
title("Quantiles regression using 15 B-splines",cex=0.5)

# uncomment next line if you want to draw the worm plot in S Plus, export the data
# write.table(wp.data,row.names=F,file="..\\Qreg\\wpdata.txt",sep="\t")


# --- Figure 6: Standard reference quantiles

# fit the regression quantiles
tau.sd <- c(-2.5,-2,-1, 0, 1, 2, 2.5)
tau <- c(pnorm(tau.sd))
fit <- rq(hgt~age, data=dutchboys,tau=tau)

# plot the references (infants)
plot(x=ref.table[1:15,"age"],y=ref.table[1:15,2],ylim=c(50,105),xlim=c(0,2.6),type="n",
     xlab="Age (years)",ylab="Length/Height (cm)")
# title("Dutch boys (0.25 to 2.5 years) - QR length/height references")
matlines(x=ref.table[1:15,"age"],y=ref.table[1:15,2:8],lty=1,col=1)
text(x=2.65,y=ref.table[15,2:8],labels=tau.sd,adj=c(1,0.5),cex=0.7)
points(x=dutchboys[,"age"],y=dutchboys[,"hgt"],cex=0.3)

# --- figure 6a - Dutch references (LMS model)
reference <- read.table(file="S:\\projecten\\a-i\\Groeistat\\Qreg\\lnj.txt",header=T,sep="\t")
plot(x=reference[31:67,"jaar"],y=reference[31:67,"SD0"],ylim=c(80,200),xlim=c(2,21),type="n",
     xlab="Age (years)",ylab="Height (cm)")
title("LMS model")
matlines(x=reference[31:67,"jaar"],y=reference[31:67,c(18,19,21,23,25,27,28)],lty=1,col=1)
text(x=21,y=reference[67,c(18,19,21,23,25,27,28)],labels=tau.sd,adj=c(1,0.5),cex=0.7)

# --- figure 6b - Dutch references (QR model)
# plot the references (children)
plot(x=ref.table[14:50,"age"],y=ref.table[14:50,2],ylim=c(80,200),xlim=c(2,21),type="n",
     xlab="Age (years)",ylab="Height (cm)")
title("QR model")
matlines(x=ref.table[14:50,"age"],y=ref.table[14:50,2:8],lty=1,col=1)
text(x=21,y=ref.table[50,2:8],labels=tau.sd,adj=c(1,0.5),cex=0.7)

